This file is used to analyse the immune cells dataset.
library(dplyr)
library(patchwork)
library(ggplot2)
library(ComplexHeatmap)
library(org.Mm.eg.db)
.libPaths()
## [1] "/usr/local/lib/R/library"
In this section, we set the global settings of the analysis. We will store data there :
save_name = "hfsc"
out_dir = "."
We load the dataset :
sobj = readRDS(paste0(out_dir, "/", save_name, "_sobj.rds"))
sobj
## An object of class Seurat
## 15384 features across 1454 samples within 1 assay
## Active assay: RNA (15384 features, 2000 variable features)
## 6 dimensional reductions calculated: RNA_pca, RNA_pca_24_tsne, RNA_pca_24_umap, harmony, harmony_24_umap, harmony_24_tsne
We load the sample information :
sample_info = readRDS(paste0(out_dir, "/../../1_metadata/hs_hd_sample_info.rds"))
project_names_oi = sample_info$project_name
graphics::pie(rep(1, nrow(sample_info)),
col = sample_info$color,
labels = sample_info$project_name)
Here are custom colors for each cell type :
color_markers = readRDS(paste0(out_dir, "/../../1_metadata/hs_hd_color_markers.rds"))
data.frame(cell_type = names(color_markers),
color = unlist(color_markers)) %>%
ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
ggplot2::geom_point(pch = 21, size = 5) +
ggplot2::scale_fill_manual(values = unlist(color_markers), breaks = names(color_markers)) +
ggplot2::theme_classic() +
ggplot2::theme(legend.position = "none",
axis.line = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_text(angle = 30, hjust = 1))
This is the projection of interest :
name2D = "harmony_24_tsne"
We visualize gene expression for some markers :
features = c("percent.mt", "percent.rb", "nFeature_RNA")
plot_list = lapply(features, FUN = function(one_gene) {
Seurat::FeaturePlot(sobj, features = one_gene,
reduction = name2D) +
ggplot2::theme(aspect.ratio = 1) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
Seurat::NoAxes()
})
patchwork::wrap_plots(plot_list, ncol = 3)
We visualize clusters :
cluster_plot = Seurat::DimPlot(sobj, reduction = name2D, label = TRUE) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1)
cluster_plot
We visualize cluster split by sample :
plot_list = aquarius::plot_split_dimred(sobj,
reduction = name2D,
split_by = "project_name",
group_by = "seurat_clusters",
split_color = setNames(sample_info$color,
nm = sample_info$project_name),
bg_pt_size = 0.5, main_pt_size = 0.5)
plot_list[[length(plot_list) + 1]] = cluster_plot
patchwork::wrap_plots(plot_list, ncol = 4) &
Seurat::NoLegend()
For each cluster, what are the differences between healthy donors (HD) and HS patients (HS) ? We save the results in a list :
list_results = list()
We set cluster of interest :
clusters_oi = c(0,8)
table(sobj$seurat_clusters, sobj$sample_type)[clusters_oi + 1, ]
##
## HS HD
## 0 551 41
## 8 37 2
What is the cluster identity ?
mark = Seurat::FindMarkers(sobj, ident.1 = clusters_oi)
mark = mark %>%
dplyr::filter(p_val_adj < 0.05) %>%
dplyr::filter(avg_logFC > 0) %>%
dplyr::arrange(-(pct.1 - pct.2), -avg_logFC)
dim(mark)
## [1] 166 5
head(mark, n = 20)
## p_val avg_logFC pct.1 pct.2 p_val_adj
## CRYAB 8.893175e-104 0.8348063 0.777 0.202 1.368126e-99
## TGFB2 6.853911e-98 0.7063543 0.865 0.330 1.054406e-93
## C2orf40 8.549453e-94 0.7325082 0.824 0.320 1.315248e-89
## DKK3 7.589022e-90 0.6949995 0.853 0.365 1.167495e-85
## LMCD1 4.367518e-87 0.7055574 0.926 0.447 6.718990e-83
## COPZ2 1.658421e-69 0.5110234 0.740 0.281 2.551314e-65
## KCNS3 7.705216e-70 0.5153170 0.634 0.177 1.185370e-65
## TUBB2B 1.190263e-78 0.7912262 0.532 0.083 1.831101e-74
## TNNT1 5.239810e-65 0.4841462 0.616 0.187 8.060924e-61
## GBP2 8.000738e-59 0.4598650 0.677 0.253 1.230834e-54
## MOXD1 7.461323e-63 0.5063368 0.591 0.173 1.147850e-58
## SCRG1 3.999573e-52 0.4705463 0.699 0.288 6.152943e-48
## FGL2 9.539796e-47 0.4622231 0.729 0.324 1.467602e-42
## KRT6B 1.495297e-54 0.5692765 0.840 0.445 2.300364e-50
## SLC35F3 1.598375e-51 0.4305711 0.580 0.193 2.458941e-47
## AQP3 4.903057e-53 0.6574094 0.528 0.153 7.542863e-49
## SBSPON 4.775566e-51 0.4339546 0.544 0.170 7.346731e-47
## TCEAL2 2.269967e-49 0.4934154 0.824 0.453 3.492117e-45
## TUBB2A 3.473408e-52 0.5761269 0.758 0.390 5.343491e-48
## CISD1 1.254175e-60 0.5676746 0.791 0.424 1.929423e-56
We visualize expression for top 9 genes :
plot_list = lapply(rownames(mark)[c(1:9)] %>% na.omit(), FUN = function(one_gene) {
Seurat::FeaturePlot(sobj, features = one_gene,
reduction = name2D) +
ggplot2::theme(aspect.ratio = 1) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
Seurat::NoAxes()
})
patchwork::wrap_plots(plot_list, ncol = 3)
We perform differential expression between HS and HD, within this population :
subsobj = subset(sobj, seurat_clusters %in% c(clusters_oi))
Seurat::Idents(subsobj) = subsobj$sample_type
table(subsobj$sample_type)
##
## HS HD
## 588 43
We identify specific markers for these group :
mark = Seurat::FindMarkers(subsobj, ident.1 = "HS", ident.2 = "HD")
mark = mark %>%
dplyr::filter(p_val_adj < 0.05) %>%
dplyr::arrange(-avg_logFC, pct.1 - pct.2)
list_results[[paste0("cluster_", paste(clusters_oi, collapse = "_"))]] = mark
dim(mark)
## [1] 63 5
head(mark, n = 20)
## p_val avg_logFC pct.1 pct.2 p_val_adj
## RPS26 2.268257e-16 1.6656628 0.988 0.907 3.489487e-12
## HLA-C 1.335607e-12 0.8241266 0.764 0.186 2.054698e-08
## JUNB 8.594453e-07 0.8063700 0.643 0.233 1.322171e-02
## IFITM3 7.851009e-11 0.7522958 0.957 0.744 1.207799e-06
## MIF 7.371090e-08 0.7167384 0.587 0.186 1.133969e-03
## HLA-A 1.499358e-08 0.6667018 0.840 0.512 2.306613e-04
## MTRNR2L8 9.303815e-07 0.5730231 0.971 0.884 1.431299e-02
## C2orf40 2.500711e-07 0.5712221 0.847 0.512 3.847094e-03
## PPIA 1.829217e-11 0.5404299 0.993 0.907 2.814068e-07
## MARCKSL1 1.369276e-06 0.5095171 0.568 0.186 2.106494e-02
## NDUFS5 7.585380e-07 0.4454670 0.929 0.744 1.166935e-02
## S100A11 4.135599e-07 0.4402109 0.981 0.884 6.362205e-03
## BEX4 1.992775e-06 0.4398865 0.779 0.395 3.065684e-02
## B2M 2.785456e-06 0.4367978 0.997 1.000 4.285145e-02
## RPS19 4.608010e-13 0.3979618 1.000 1.000 7.088962e-09
## RPL7A 6.182437e-14 0.3863599 1.000 1.000 9.511061e-10
## RPS3 4.944940e-11 0.3858520 1.000 0.977 7.607296e-07
## RPL12 2.548191e-06 0.3275532 0.997 1.000 3.920137e-02
## NACA 2.112370e-06 0.3214237 0.998 0.930 3.249671e-02
## RPS3A 1.181189e-10 0.3195909 1.000 1.000 1.817140e-06
We set cluster of interest :
clusters_oi = 2
table(sobj$seurat_clusters, sobj$sample_type)[clusters_oi + 1, ]
## HS HD
## 136 47
What is the cluster identity ?
mark = Seurat::FindMarkers(sobj, ident.1 = clusters_oi)
mark = mark %>%
dplyr::filter(p_val_adj < 0.05) %>%
dplyr::filter(avg_logFC > 0) %>%
dplyr::arrange(-(pct.1 - pct.2), -avg_logFC)
dim(mark)
## [1] 214 5
head(mark, n = 20)
## p_val avg_logFC pct.1 pct.2 p_val_adj
## MGP 2.492025e-97 2.7463894 0.721 0.118 3.833731e-93
## ANGPTL7 1.164052e-91 2.6595942 0.536 0.050 1.790778e-87
## COMP 1.261726e-53 1.7790487 0.634 0.176 1.941039e-49
## EDN2 1.530623e-43 0.9127798 0.563 0.161 2.354710e-39
## BASP1 1.709113e-45 1.2056041 0.760 0.363 2.629300e-41
## TIMP3 4.385138e-68 1.8132960 0.934 0.552 6.746096e-64
## FGF18 6.906960e-40 1.3258349 0.470 0.114 1.062567e-35
## RAMP1 3.724275e-30 0.7112786 0.689 0.338 5.729425e-26
## PYGL 7.123456e-30 0.6808239 0.541 0.197 1.095872e-25
## SLC7A8 3.584080e-31 0.8152063 0.514 0.172 5.513749e-27
## CRLF1 1.039506e-40 0.8020407 0.432 0.092 1.599176e-36
## GJB2 4.866904e-42 1.0132899 0.863 0.526 7.487245e-38
## ISOC1 5.200844e-29 0.6789868 0.525 0.191 8.000978e-25
## LYPD6B 2.226162e-24 0.5716490 0.749 0.415 3.424727e-20
## LGR5 4.278338e-21 0.5745084 0.672 0.340 6.581795e-17
## SRM 9.813331e-32 1.0196308 0.639 0.312 1.509683e-27
## RERG 1.874176e-25 0.5098679 0.486 0.164 2.883232e-21
## ABI3BP 3.885624e-22 0.4800682 0.492 0.179 5.977643e-18
## TNFRSF12A 2.069187e-23 0.7120606 0.568 0.260 3.183237e-19
## PMEPA1 2.103526e-21 0.4925963 0.475 0.182 3.236064e-17
We visualize expression for top 9 genes :
plot_list = lapply(rownames(mark)[c(1:9)] %>% na.omit(), FUN = function(one_gene) {
Seurat::FeaturePlot(sobj, features = one_gene,
reduction = name2D) +
ggplot2::theme(aspect.ratio = 1) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
Seurat::NoAxes()
})
patchwork::wrap_plots(plot_list, ncol = 3)
We perform differential expression between HS and HD, within this population :
subsobj = subset(sobj, seurat_clusters %in% c(clusters_oi))
Seurat::Idents(subsobj) = subsobj$sample_type
table(subsobj$sample_type)
##
## HS HD
## 136 47
We identify specific markers for these group :
mark = Seurat::FindMarkers(subsobj, ident.1 = "HS", ident.2 = "HD")
mark = mark %>%
dplyr::filter(p_val_adj < 0.05) %>%
dplyr::arrange(-avg_logFC, pct.1 - pct.2)
list_results[[paste0("cluster_", paste(clusters_oi, collapse = "_"))]] = mark
dim(mark)
## [1] 17 5
head(mark, n = 20)
## p_val avg_logFC pct.1 pct.2 p_val_adj
## MTRNR2L12 1.228805e-07 1.0695798 0.838 0.766 1.890394e-03
## RPS26 5.391848e-11 0.9666087 0.949 0.957 8.294819e-07
## MTRNR2L8 1.469810e-06 0.8650795 0.787 0.681 2.261156e-02
## HLA-C 1.308993e-06 0.5706460 0.654 0.255 2.013755e-02
## SOX13 2.826741e-06 -0.2590431 0.096 0.383 4.348659e-02
## KIF12 4.000500e-07 -0.4137205 0.096 0.404 6.154369e-03
## PDK4 1.860710e-07 -0.4184322 0.000 0.191 2.862517e-03
## IGF2BP2 1.762914e-06 -0.4335053 0.184 0.511 2.712067e-02
## SPON2 1.036798e-09 -0.5306683 0.029 0.362 1.595011e-05
## AC245014.3 2.450167e-09 -0.5654069 0.066 0.426 3.769338e-05
## RPS10 1.573924e-10 -0.6099600 0.978 0.979 2.421324e-06
## CLDN1 2.934576e-08 -0.7177683 0.441 0.787 4.514552e-04
## PTCH1 1.348989e-06 -0.7326807 0.191 0.511 2.075285e-02
## DDIT4 1.792009e-06 -0.8033103 0.632 0.872 2.756827e-02
## KLF6 3.170539e-07 -0.8875825 0.500 0.787 4.877557e-03
## AC007952.4 3.140336e-07 -1.0753929 0.368 0.681 4.831093e-03
## KRT17 1.303308e-11 -1.7796522 0.779 0.894 2.005009e-07
We set cluster of interest :
clusters_oi = 1
table(sobj$seurat_clusters, sobj$sample_type)[clusters_oi + 1, ]
## HS HD
## 171 61
What is the cluster identity ?
mark = Seurat::FindMarkers(sobj, ident.1 = clusters_oi)
mark = mark %>%
dplyr::filter(p_val_adj < 0.05) %>%
dplyr::filter(avg_logFC > 0) %>%
dplyr::arrange(-(pct.1 - pct.2), -avg_logFC)
dim(mark)
## [1] 167 5
head(mark, n = 20)
## p_val avg_logFC pct.1 pct.2 p_val_adj
## EPCAM 8.701500e-89 0.9414227 0.815 0.208 1.338639e-84
## SAT1 1.470636e-75 0.9821697 0.793 0.220 2.262426e-71
## BHLHE41 8.148751e-80 0.8958304 0.733 0.165 1.253604e-75
## SULT1E1 6.865256e-88 1.0138555 0.694 0.134 1.056151e-83
## SAMD11 6.647504e-111 0.5660124 0.603 0.047 1.022652e-106
## RAMP1 1.757167e-50 0.4707782 0.841 0.295 2.703226e-46
## SLC38A5 2.928872e-51 0.4813625 0.750 0.237 4.505776e-47
## IGFBP2 2.365155e-64 0.7490662 0.720 0.208 3.638554e-60
## SLC7A1 5.693704e-54 0.4669060 0.711 0.204 8.759194e-50
## ADAMTS6 2.327887e-68 0.5062058 0.586 0.099 3.581221e-64
## COL14A1 3.160395e-58 0.5380779 0.638 0.158 4.861951e-54
## WNT5A 9.804365e-61 0.5932221 0.621 0.147 1.508303e-56
## TSPAN18 9.686475e-52 0.5587526 0.668 0.196 1.490167e-47
## GFRA1 2.691764e-58 0.6459056 0.621 0.155 4.141010e-54
## PPFIBP1 1.294996e-55 0.7369063 0.884 0.421 1.992221e-51
## SMS 5.074344e-46 0.4496446 0.664 0.214 7.806370e-42
## PLPP3 2.737337e-48 0.5295214 0.638 0.191 4.211119e-44
## ETS2 1.764498e-67 0.4807835 0.526 0.082 2.714504e-63
## TGFBI 3.159564e-51 0.4474048 0.582 0.138 4.860674e-47
## LGR5 3.366746e-28 0.2740148 0.754 0.311 5.179402e-24
We visualize expression for top 9 genes :
plot_list = lapply(rownames(mark)[c(1:9)] %>% na.omit(), FUN = function(one_gene) {
Seurat::FeaturePlot(sobj, features = one_gene,
reduction = name2D) +
ggplot2::theme(aspect.ratio = 1) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
Seurat::NoAxes()
})
patchwork::wrap_plots(plot_list, ncol = 3)
We perform differential expression between HS and HD, within this population :
subsobj = subset(sobj, seurat_clusters %in% c(clusters_oi))
Seurat::Idents(subsobj) = subsobj$sample_type
table(subsobj$sample_type)
##
## HS HD
## 171 61
We identify specific markers for these group :
mark = Seurat::FindMarkers(subsobj, ident.1 = "HS", ident.2 = "HD")
mark = mark %>%
dplyr::filter(p_val_adj < 0.05) %>%
dplyr::arrange(-avg_logFC, pct.1 - pct.2)
list_results[[paste0("cluster_", paste(clusters_oi, collapse = "_"))]] = mark
dim(mark)
## [1] 140 5
head(mark, n = 20)
## p_val avg_logFC pct.1 pct.2 p_val_adj
## RPS4Y1 7.233299e-13 1.2223882 0.661 0.066 1.112771e-08
## MTRNR2L12 2.289570e-22 1.1776324 1.000 0.984 3.522274e-18
## MTRNR2L8 9.767799e-17 0.8256264 0.971 0.803 1.502678e-12
## TIMP3 2.380418e-09 0.7115757 0.596 0.131 3.662035e-05
## S100A2 6.541007e-08 0.6696135 0.860 0.721 1.006268e-03
## MTRNR2L1 2.513883e-09 0.6532328 0.784 0.492 3.867357e-05
## MIF 1.810111e-10 0.6231717 0.696 0.230 2.784675e-06
## TXNDC17 8.026745e-08 0.6222742 0.883 0.754 1.234834e-03
## HLA-C 1.099631e-12 0.5994132 0.760 0.230 1.691672e-08
## IRX2 1.252442e-09 0.5867756 0.912 0.738 1.926757e-05
## RPS28 1.761394e-15 0.5752626 1.000 1.000 2.709728e-11
## RPL18A 3.059079e-18 0.5635677 1.000 1.000 4.706087e-14
## MT-CO2 2.060947e-11 0.5600574 0.994 1.000 3.170561e-07
## RPL12 1.498315e-15 0.5116740 1.000 1.000 2.305007e-11
## MTRNR2L10 5.986923e-08 0.5047378 0.708 0.295 9.210282e-04
## RPS14 5.325665e-17 0.5037330 1.000 1.000 8.193003e-13
## RPS17 1.557888e-08 0.4884645 1.000 1.000 2.396655e-04
## RPS7 1.350879e-17 0.4711617 1.000 0.984 2.078192e-13
## RPL23A 1.571683e-14 0.4643245 0.988 1.000 2.417878e-10
## EEF1B2 5.485023e-13 0.4455383 1.000 1.000 8.438159e-09
We set cluster of interest :
clusters_oi = 3
table(sobj$seurat_clusters, sobj$sample_type)[clusters_oi + 1, ]
## HS HD
## 103 34
What is the cluster identity ?
mark = Seurat::FindMarkers(sobj, ident.1 = clusters_oi)
mark = mark %>%
dplyr::filter(p_val_adj < 0.05) %>%
dplyr::filter(avg_logFC > 0) %>%
dplyr::arrange(-(pct.1 - pct.2), -avg_logFC)
dim(mark)
## [1] 101 5
head(mark, n = 20)
## p_val avg_logFC pct.1 pct.2 p_val_adj
## COL7A1 6.531033e-26 1.2655027 0.723 0.524 1.004734e-21
## ANKRD36C 2.239624e-12 0.8204873 0.372 0.176 3.445438e-08
## VASH1 9.719204e-08 0.7353278 0.394 0.263 1.495202e-03
## PKD1 2.670178e-07 0.5959187 0.372 0.243 4.107802e-03
## GOLGA8B 1.372865e-06 0.7152186 0.314 0.191 2.112016e-02
## ARHGEF10 6.809387e-07 0.7110796 0.372 0.251 1.047556e-02
## NBEAL2 1.502880e-07 0.3438166 0.175 0.062 2.312030e-03
## UBR4 1.096193e-06 0.6459494 0.394 0.282 1.686383e-02
## MTRNR2L1 6.570316e-21 1.1128743 0.774 0.665 1.010777e-16
## XIST 1.251196e-18 1.3722485 0.679 0.574 1.924840e-14
## ITPR2 3.334536e-10 0.8364656 0.562 0.460 5.129850e-06
## PHACTR1 1.820119e-08 0.6825739 0.511 0.415 2.800071e-04
## MACF1 1.814596e-36 1.0909726 0.876 0.782 2.791575e-32
## PTPRF 2.014365e-11 0.7192972 0.606 0.517 3.098899e-07
## SH3PXD2A 3.921949e-10 0.8207961 0.555 0.470 6.033527e-06
## COL12A1 2.171823e-06 0.8041452 0.445 0.361 3.341133e-02
## SMG1 3.945108e-11 0.8516457 0.577 0.495 6.069154e-07
## TNC 2.433094e-15 0.8954344 0.745 0.664 3.743071e-11
## FTX 2.366197e-12 0.9393048 0.613 0.538 3.640157e-08
## PLEC 1.557429e-12 0.7958488 0.628 0.557 2.395949e-08
We visualize expression for top 9 genes :
plot_list = lapply(rownames(mark)[c(1:9)] %>% na.omit(), FUN = function(one_gene) {
Seurat::FeaturePlot(sobj, features = one_gene,
reduction = name2D) +
ggplot2::theme(aspect.ratio = 1) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
Seurat::NoAxes()
})
patchwork::wrap_plots(plot_list, ncol = 3)
We perform differential expression between HS and HD, within this population :
subsobj = subset(sobj, seurat_clusters %in% c(clusters_oi))
Seurat::Idents(subsobj) = subsobj$sample_type
table(subsobj$sample_type)
##
## HS HD
## 103 34
We identify specific markers for these group :
mark = Seurat::FindMarkers(subsobj, ident.1 = "HS", ident.2 = "HD")
mark = mark %>%
dplyr::filter(p_val_adj < 0.05) %>%
dplyr::arrange(-avg_logFC, pct.1 - pct.2)
list_results[[paste0("cluster_", paste(clusters_oi, collapse = "_"))]] = mark
dim(mark)
## [1] 5 5
head(mark, n = 20)
## p_val avg_logFC pct.1 pct.2 p_val_adj
## MTRNR2L8 2.221313e-08 0.9784437 0.971 0.882 3.417267e-04
## MTRNR2L12 1.367553e-07 0.7155726 1.000 0.941 2.103844e-03
## RHOB 8.321477e-07 -0.9281971 0.330 0.735 1.280176e-02
## KLF6 1.722485e-12 -1.7195677 0.379 0.912 2.649871e-08
## SGK1 1.330925e-07 -2.0690477 0.282 0.676 2.047495e-03
Small clusters contain not enough cells to compare HD and HS, but we can try to identify specific markers.
We set cluster of interest :
clusters_oi = 5
table(sobj$seurat_clusters, sobj$sample_type)[clusters_oi + 1, ]
## HS HD
## 43 21
What is the cluster identity ?
mark = Seurat::FindMarkers(sobj, ident.1 = clusters_oi)
mark = mark %>%
dplyr::filter(p_val_adj < 0.05) %>%
dplyr::filter(avg_logFC > 0) %>%
dplyr::arrange(-(pct.1 - pct.2), -avg_logFC)
dim(mark)
## [1] 170 5
head(mark, n = 20)
## p_val avg_logFC pct.1 pct.2 p_val_adj
## KRT75 4.261481e-82 2.6425233 0.875 0.101 6.555862e-78
## SULF2 4.517836e-37 0.6274183 0.750 0.147 6.950239e-33
## CD24 7.950250e-44 0.7102213 0.672 0.102 1.223066e-39
## ALCAM 3.398661e-49 0.8414933 0.641 0.079 5.228500e-45
## NECTIN4 1.132459e-71 0.5006038 0.578 0.035 1.742176e-67
## DSG1 1.582596e-67 0.9318441 0.578 0.040 2.434665e-63
## ALDH2 8.100503e-28 0.8544061 0.891 0.369 1.246181e-23
## COL27A1 8.911981e-19 0.6993912 0.781 0.278 1.371019e-14
## TUBA4A 3.929171e-21 0.6956015 0.766 0.264 6.044636e-17
## NRP2 8.392743e-28 0.8803858 0.625 0.144 1.291140e-23
## CUX1 6.035228e-27 0.9231917 0.625 0.148 9.284595e-23
## NOTCH3 2.092677e-50 0.3759649 0.516 0.043 3.219374e-46
## CLDN1 6.364765e-15 0.6859477 0.812 0.343 9.791555e-11
## TFAP2B 6.792002e-17 0.6717193 0.875 0.413 1.044882e-12
## ZNF750 2.895948e-60 0.5804328 0.484 0.029 4.455126e-56
## RHOV 1.378067e-59 0.6346190 0.484 0.030 2.120018e-55
## HOPX 7.308344e-18 0.8920688 0.859 0.407 1.124316e-13
## LGR5 3.940750e-11 0.3233861 0.812 0.362 6.062450e-07
## TM4SF1 1.472294e-20 1.0866761 0.859 0.413 2.264977e-16
## SEPT11 5.175101e-19 0.7978541 0.797 0.352 7.961375e-15
We visualize expression for top 9 genes :
plot_list = lapply(rownames(mark)[c(1:9)] %>% na.omit(), FUN = function(one_gene) {
Seurat::FeaturePlot(sobj, features = one_gene,
reduction = name2D) +
ggplot2::theme(aspect.ratio = 1) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
Seurat::NoAxes()
})
patchwork::wrap_plots(plot_list, ncol = 3)
We set cluster of interest :
clusters_oi = 6
table(sobj$seurat_clusters, sobj$sample_type)[clusters_oi + 1, ]
## HS HD
## 41 12
What is the cluster identity ?
mark = Seurat::FindMarkers(sobj, ident.1 = clusters_oi)
mark = mark %>%
dplyr::filter(p_val_adj < 0.05) %>%
dplyr::filter(avg_logFC > 0) %>%
dplyr::arrange(-(pct.1 - pct.2), -avg_logFC)
dim(mark)
## [1] 379 5
head(mark, n = 20)
## p_val avg_logFC pct.1 pct.2 p_val_adj
## NOTCH1 3.494599e-43 1.2124688 0.736 0.124 5.376092e-39
## FLNB 8.873739e-33 1.7683509 0.906 0.344 1.365136e-28
## NOTCH3 4.668186e-60 1.0439873 0.585 0.044 7.181538e-56
## GRHL1 1.093699e-37 1.4300971 0.642 0.105 1.682547e-33
## CMYA5 1.222744e-24 1.5442306 0.717 0.218 1.881070e-20
## FAM129A 5.322588e-40 1.0668158 0.547 0.064 8.188270e-36
## ABI3BP 1.448411e-19 1.0344985 0.679 0.201 2.228235e-15
## MTSS1 1.737633e-25 1.2858094 0.868 0.390 2.673174e-21
## SFXN5 8.051626e-29 0.9838617 0.585 0.111 1.238662e-24
## MRC2 1.106825e-24 1.6737178 0.830 0.383 1.702739e-20
## PLEKHG3 1.415801e-20 1.0646931 0.642 0.200 2.178068e-16
## CNKSR3 6.452408e-29 1.1084106 0.528 0.088 9.926384e-25
## NECTIN4 5.497905e-42 0.9531383 0.472 0.043 8.457977e-38
## SULF2 5.034367e-21 1.1926350 0.585 0.158 7.744870e-17
## STON2 5.577013e-19 0.8646229 0.585 0.171 8.579676e-15
## FABP5 6.371424e-22 1.6432462 0.849 0.438 9.801798e-18
## PDZRN3 2.943543e-24 0.8262355 0.491 0.089 4.528346e-20
## DIAPH2 5.071946e-17 0.9115014 0.623 0.224 7.802682e-13
## CASZ1 5.112385e-18 1.0620881 0.774 0.376 7.864892e-14
## CELSR2 1.075819e-18 1.2863026 0.736 0.341 1.655040e-14
We visualize expression for top 9 genes :
plot_list = lapply(rownames(mark)[c(1:9)] %>% na.omit(), FUN = function(one_gene) {
Seurat::FeaturePlot(sobj, features = one_gene,
reduction = name2D) +
ggplot2::theme(aspect.ratio = 1) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
Seurat::NoAxes()
})
patchwork::wrap_plots(plot_list, ncol = 3)
We set cluster of interest :
clusters_oi = 7
table(sobj$seurat_clusters, sobj$sample_type)[clusters_oi + 1, ]
## HS HD
## 41 2
What is the cluster identity ?
mark = Seurat::FindMarkers(sobj, ident.1 = clusters_oi)
mark = mark %>%
dplyr::filter(p_val_adj < 0.05) %>%
dplyr::filter(avg_logFC > 0) %>%
dplyr::arrange(-(pct.1 - pct.2), -avg_logFC)
dim(mark)
## [1] 17 5
head(mark, n = 20)
## p_val avg_logFC pct.1 pct.2 p_val_adj
## SCGB2A2 7.647401e-44 2.3207587 0.558 0.048 1.176476e-39
## CAMK2N1 1.635715e-17 0.5294409 0.465 0.082 2.516384e-13
## NPPC 1.790106e-12 1.6218556 0.698 0.320 2.753898e-08
## NPTX2 2.111608e-10 0.4160402 0.535 0.164 3.248498e-06
## KRT6C 7.926649e-09 0.4073147 0.535 0.185 1.219436e-04
## POSTN 9.707565e-13 1.7186060 0.814 0.468 1.493412e-08
## PCDH7 1.515280e-06 0.5112558 0.581 0.240 2.331106e-02
## TIMP1 1.322974e-06 0.3892821 0.744 0.432 2.035262e-02
## PLCB1 1.938641e-06 0.2896887 0.488 0.187 2.982406e-02
## CHI3L1 1.150496e-06 0.9322679 0.535 0.242 1.769922e-02
## ADIRF 1.828850e-06 0.6303150 0.651 0.368 2.813503e-02
## RFLNA 2.732072e-06 0.2655561 0.372 0.116 4.203020e-02
## NR4A1 6.133495e-08 0.4890826 0.326 0.085 9.435768e-04
## HLA-DRA 5.218463e-09 1.1929512 0.233 0.043 8.028083e-05
## CDC25B 1.291361e-07 0.2835974 0.233 0.047 1.986630e-03
## ZFHX4 2.515833e-06 0.3477340 0.116 0.016 3.870357e-02
## S100A6 2.397557e-08 0.5459797 0.977 0.907 3.688402e-04
We visualize expression for top 9 genes :
plot_list = lapply(rownames(mark)[c(1:9)] %>% na.omit(), FUN = function(one_gene) {
Seurat::FeaturePlot(sobj, features = one_gene,
reduction = name2D) +
ggplot2::theme(aspect.ratio = 1) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
Seurat::NoAxes()
})
patchwork::wrap_plots(plot_list, ncol = 3)
We set cluster of interest :
clusters_oi = 8
table(sobj$seurat_clusters, sobj$sample_type)[clusters_oi + 1, ]
## HS HD
## 37 2
What is the cluster identity ?
mark = Seurat::FindMarkers(sobj, ident.1 = clusters_oi)
mark = mark %>%
dplyr::filter(p_val_adj < 0.05) %>%
dplyr::filter(avg_logFC > 0) %>%
dplyr::arrange(-(pct.1 - pct.2), -avg_logFC)
dim(mark)
## [1] 101 5
head(mark, n = 20)
## p_val avg_logFC pct.1 pct.2 p_val_adj
## PTHLH 2.299174e-94 1.4808986 0.897 0.052 3.537049e-90
## AQP3 2.438416e-32 1.8302392 1.000 0.297 3.751259e-28
## FST 2.061507e-26 1.2373030 0.949 0.254 3.171422e-22
## TMEM45A 4.320274e-27 0.9680393 0.769 0.167 6.646310e-23
## C1QTNF12 4.390215e-19 0.7919568 0.821 0.243 6.753907e-15
## VIM 1.969431e-22 1.4872649 0.923 0.360 3.029773e-18
## CRABP2 4.165189e-18 0.7416611 0.744 0.208 6.407726e-14
## ANKH 4.422814e-18 0.5728452 0.718 0.188 6.804057e-14
## C19orf33 1.099493e-13 0.7194400 0.872 0.356 1.691460e-09
## ODC1 4.704920e-21 0.6689324 0.641 0.128 7.238048e-17
## CYR61 6.040557e-19 0.5846073 0.641 0.146 9.292794e-15
## RND3 1.805197e-15 0.6701467 0.692 0.208 2.777115e-11
## MOXD1 8.470459e-14 0.6805826 0.821 0.341 1.303095e-09
## SIRT2 1.707509e-10 0.4731114 0.821 0.362 2.626832e-06
## BDNF 8.552560e-14 0.5160020 0.615 0.162 1.315726e-09
## C16orf74 1.435042e-15 0.4744279 0.590 0.140 2.207669e-11
## TNFRSF18 3.500544e-15 0.4155453 0.590 0.140 5.385237e-11
## TPD52L1 3.573035e-11 0.3931087 0.692 0.242 5.496757e-07
## ABRACL 1.468416e-14 0.7629582 0.949 0.519 2.259011e-10
## SOCS3 3.094980e-08 0.3219107 0.692 0.265 4.761317e-04
We visualize expression for top 9 genes :
plot_list = lapply(rownames(mark)[c(1:9)] %>% na.omit(), FUN = function(one_gene) {
Seurat::FeaturePlot(sobj, features = one_gene,
reduction = name2D) +
ggplot2::theme(aspect.ratio = 1) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
Seurat::NoAxes()
})
patchwork::wrap_plots(plot_list, ncol = 3)
We represent differentially expressed genes. First, we extract all DE genes :
features_oi = lapply(list_results, FUN = rownames) %>%
unlist() %>% unname() %>% unique()
length(features_oi)
## [1] 175
We prepare the scaled expression matrix :
mat_expression = Seurat::GetAssayData(sobj, assay = "RNA", slot = "data")[features_oi, ]
mat_expression = Matrix::t(mat_expression)
mat_expression = dynutils::scale_quantile(mat_expression) # between 0 and 1
mat_expression = Matrix::t(mat_expression)
mat_expression = as.matrix(mat_expression) # not sparse
dim(mat_expression)
## [1] 175 1454
We prepare the heatmap annotation :
ha_top = ComplexHeatmap::HeatmapAnnotation(
sample_type = sobj$sample_type,
cluster = sobj$seurat_clusters,
col = list(sample_type = setNames(nm = c("HS", "HD"),
c("#C55F40", "#2C78E6")),
cluster = setNames(nm = levels(sobj$seurat_clusters),
aquarius::gg_color_hue(length(levels(sobj$seurat_clusters))))))
And the heatmap :
sobj$cell_group = paste0(sobj$sample_type, sobj$seurat_clusters) %>%
as.factor()
ht = ComplexHeatmap::Heatmap(mat_expression,
col = aquarius::color_cnv,
# Annotation
top_annotation = ha_top,
# Grouping
column_order = sobj@meta.data %>%
dplyr::arrange(sample_type, seurat_clusters) %>%
rownames(),
column_split = sobj$cell_group,
column_gap = unit.c(rep(unit(0.01, "mm"), 8),
unit(2, "mm"),
rep(unit(0.01, "mm"), 8)),
column_title = NULL,
cluster_rows = FALSE,
cluster_columns = FALSE,
show_column_names = FALSE,
# Visual aspect
show_heatmap_legend = TRUE,
border = TRUE)
ComplexHeatmap::draw(ht,
merge_legend = TRUE,
heatmap_legend_side = "bottom",
annotation_legend_side = "bottom")
features_oi_pos = lapply(list_results, FUN = function(df) {
features_pos = df %>% dplyr::filter(avg_logFC > 0) %>%
rownames()
return(features_pos)
}) %>% unlist() %>% unname() %>% unique() %>% sort()
features_oi_neg = lapply(list_results, FUN = function(df) {
features_neg = df %>% dplyr::filter(avg_logFC < 0) %>%
rownames()
return(features_neg)
}) %>% unlist() %>% unname() %>% unique() %>% sort()
Seurat::DotPlot(sobj,
assay = "RNA",
features = c(features_oi_pos, features_oi_neg),
group.by = "sample_type") +
ggplot2::coord_flip() +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
ggplot2::theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(angle = 30, hjust = 1))
We save the list of results :
saveRDS(list_results, file = paste0(out_dir, "/", save_name, "_list_results.rds"))
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/local/lib/R/lib/libRblas.so
## LAPACK: /usr/local/lib/R/lib/libRlapack.so
##
## locale:
## [1] C
##
## attached base packages:
## [1] parallel stats4 grid stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] org.Mm.eg.db_3.10.0 AnnotationDbi_1.48.0 IRanges_2.20.2
## [4] S4Vectors_0.24.4 Biobase_2.46.0 BiocGenerics_0.32.0
## [7] ComplexHeatmap_2.14.0 ggplot2_3.3.5 patchwork_1.1.2
## [10] dplyr_1.0.7
##
## loaded via a namespace (and not attached):
## [1] softImpute_1.4 graphlayouts_0.7.0
## [3] pbapply_1.4-2 lattice_0.20-41
## [5] haven_2.3.1 vctrs_0.3.8
## [7] usethis_2.0.1 dynwrap_1.2.1
## [9] blob_1.2.1 survival_3.2-13
## [11] prodlim_2019.11.13 dynutils_1.0.5
## [13] later_1.3.0 DBI_1.1.1
## [15] R.utils_2.11.0 SingleCellExperiment_1.8.0
## [17] rappdirs_0.3.3 uwot_0.1.8
## [19] dqrng_0.2.1 jpeg_0.1-8.1
## [21] zlibbioc_1.32.0 pspline_1.0-18
## [23] pcaMethods_1.78.0 mvtnorm_1.1-1
## [25] htmlwidgets_1.5.4 GlobalOptions_0.1.2
## [27] future_1.22.1 UpSetR_1.4.0
## [29] laeken_0.5.2 leiden_0.3.3
## [31] clustree_0.4.3 scater_1.14.6
## [33] irlba_2.3.3 DEoptimR_1.0-9
## [35] tidygraph_1.1.2 Rcpp_1.0.9
## [37] readr_2.0.2 KernSmooth_2.23-17
## [39] carrier_0.1.0 promises_1.1.0
## [41] gdata_2.18.0 DelayedArray_0.12.3
## [43] limma_3.42.2 graph_1.64.0
## [45] RcppParallel_5.1.4 Hmisc_4.4-0
## [47] fs_1.5.2 RSpectra_0.16-0
## [49] fastmatch_1.1-0 ranger_0.12.1
## [51] digest_0.6.25 png_0.1-7
## [53] sctransform_0.2.1 cowplot_1.0.0
## [55] DOSE_3.12.0 here_1.0.1
## [57] TInGa_0.0.0.9000 ggraph_2.0.3
## [59] pkgconfig_2.0.3 GO.db_3.10.0
## [61] DelayedMatrixStats_1.8.0 gower_0.2.1
## [63] ggbeeswarm_0.6.0 iterators_1.0.12
## [65] DropletUtils_1.6.1 reticulate_1.26
## [67] clusterProfiler_3.14.3 SummarizedExperiment_1.16.1
## [69] circlize_0.4.15 beeswarm_0.4.0
## [71] GetoptLong_1.0.5 xfun_0.35
## [73] bslib_0.3.1 zoo_1.8-10
## [75] tidyselect_1.1.0 reshape2_1.4.4
## [77] purrr_0.3.4 ica_1.0-2
## [79] pcaPP_1.9-73 viridisLite_0.3.0
## [81] rtracklayer_1.46.0 rlang_1.0.2
## [83] hexbin_1.28.1 jquerylib_0.1.4
## [85] dyneval_0.9.9 glue_1.4.2
## [87] RColorBrewer_1.1-2 matrixStats_0.56.0
## [89] stringr_1.4.0 lava_1.6.7
## [91] europepmc_0.3 DESeq2_1.26.0
## [93] recipes_0.1.17 labeling_0.3
## [95] httpuv_1.5.2 class_7.3-17
## [97] BiocNeighbors_1.4.2 DO.db_2.9
## [99] annotate_1.64.0 jsonlite_1.7.2
## [101] XVector_0.26.0 bit_4.0.4
## [103] mime_0.9 aquarius_0.1.5
## [105] Rsamtools_2.2.3 gridExtra_2.3
## [107] gplots_3.0.3 stringi_1.4.6
## [109] processx_3.5.2 gsl_2.1-6
## [111] bitops_1.0-6 cli_3.0.1
## [113] batchelor_1.2.4 RSQLite_2.2.0
## [115] randomForest_4.6-14 tidyr_1.1.4
## [117] data.table_1.14.2 rstudioapi_0.13
## [119] GenomicAlignments_1.22.1 nlme_3.1-147
## [121] qvalue_2.18.0 scran_1.14.6
## [123] locfit_1.5-9.4 scDblFinder_1.1.8
## [125] listenv_0.8.0 ggthemes_4.2.4
## [127] gridGraphics_0.5-0 R.oo_1.24.0
## [129] dbplyr_1.4.4 TTR_0.24.2
## [131] readxl_1.3.1 lifecycle_1.0.1
## [133] timeDate_3043.102 ggpattern_0.3.1
## [135] munsell_0.5.0 cellranger_1.1.0
## [137] R.methodsS3_1.8.1 proxyC_0.1.5
## [139] visNetwork_2.0.9 caTools_1.18.0
## [141] codetools_0.2-16 GenomeInfoDb_1.22.1
## [143] vipor_0.4.5 lmtest_0.9-38
## [145] msigdbr_7.5.1 htmlTable_1.13.3
## [147] triebeard_0.3.0 lsei_1.2-0
## [149] xtable_1.8-4 ROCR_1.0-7
## [151] BiocManager_1.30.10 scatterplot3d_0.3-41
## [153] abind_1.4-5 farver_2.0.3
## [155] parallelly_1.28.1 RANN_2.6.1
## [157] askpass_1.1 GenomicRanges_1.38.0
## [159] RcppAnnoy_0.0.16 tibble_3.1.5
## [161] ggdendro_0.1-20 cluster_2.1.0
## [163] future.apply_1.5.0 Seurat_3.1.5
## [165] dendextend_1.15.1 Matrix_1.3-2
## [167] ellipsis_0.3.2 prettyunits_1.1.1
## [169] lubridate_1.7.9 ggridges_0.5.2
## [171] igraph_1.2.5 RcppEigen_0.3.3.7.0
## [173] fgsea_1.12.0 remotes_2.4.2
## [175] scBFA_1.0.0 destiny_3.0.1
## [177] VIM_6.1.1 testthat_3.1.0
## [179] htmltools_0.5.2 BiocFileCache_1.10.2
## [181] yaml_2.2.1 utf8_1.1.4
## [183] plotly_4.9.2.1 XML_3.99-0.3
## [185] ModelMetrics_1.2.2.2 e1071_1.7-3
## [187] foreign_0.8-76 withr_2.5.0
## [189] fitdistrplus_1.0-14 BiocParallel_1.20.1
## [191] xgboost_1.4.1.1 bit64_4.0.5
## [193] foreach_1.5.0 robustbase_0.93-9
## [195] Biostrings_2.54.0 GOSemSim_2.13.1
## [197] rsvd_1.0.3 memoise_2.0.0
## [199] evaluate_0.18 forcats_0.5.0
## [201] rio_0.5.16 geneplotter_1.64.0
## [203] tzdb_0.1.2 caret_6.0-86
## [205] ps_1.6.0 DiagrammeR_1.0.6.1
## [207] curl_4.3 fdrtool_1.2.15
## [209] fansi_0.4.1 highr_0.8
## [211] urltools_1.7.3 xts_0.12.1
## [213] GSEABase_1.48.0 acepack_1.4.1
## [215] edgeR_3.28.1 checkmate_2.0.0
## [217] scds_1.2.0 cachem_1.0.6
## [219] npsurv_0.4-0 babelgene_22.3
## [221] rjson_0.2.20 openxlsx_4.1.5
## [223] ggrepel_0.9.1 clue_0.3-60
## [225] rprojroot_2.0.2 stabledist_0.7-1
## [227] tools_3.6.3 sass_0.4.0
## [229] nichenetr_1.1.1 magrittr_2.0.1
## [231] RCurl_1.98-1.2 proxy_0.4-24
## [233] car_3.0-11 ape_5.3
## [235] ggplotify_0.0.5 xml2_1.3.2
## [237] httr_1.4.2 assertthat_0.2.1
## [239] rmarkdown_2.18 boot_1.3-25
## [241] globals_0.14.0 R6_2.4.1
## [243] Rhdf5lib_1.8.0 nnet_7.3-14
## [245] RcppHNSW_0.2.0 progress_1.2.2
## [247] genefilter_1.68.0 statmod_1.4.34
## [249] gtools_3.8.2 shape_1.4.6
## [251] HDF5Array_1.14.4 BiocSingular_1.2.2
## [253] rhdf5_2.30.1 splines_3.6.3
## [255] AUCell_1.8.0 carData_3.0-4
## [257] colorspace_1.4-1 generics_0.1.0
## [259] base64enc_0.1-3 dynfeature_1.0.0
## [261] smoother_1.1 gridtext_0.1.1
## [263] pillar_1.6.3 tweenr_1.0.1
## [265] sp_1.4-1 ggplot.multistats_1.0.0
## [267] rvcheck_0.1.8 GenomeInfoDbData_1.2.2
## [269] plyr_1.8.6 gtable_0.3.0
## [271] zip_2.2.0 knitr_1.41
## [273] latticeExtra_0.6-29 biomaRt_2.42.1
## [275] fastmap_1.1.0 ADGofTest_0.3
## [277] copula_1.0-0 doParallel_1.0.15
## [279] vcd_1.4-8 babelwhale_1.0.1
## [281] openssl_1.4.1 scales_1.1.1
## [283] backports_1.2.1 ipred_0.9-12
## [285] enrichplot_1.6.1 hms_1.1.1
## [287] ggforce_0.3.1 Rtsne_0.15
## [289] shiny_1.7.1 numDeriv_2016.8-1.1
## [291] polyclip_1.10-0 lazyeval_0.2.2
## [293] Formula_1.2-3 tsne_0.1-3
## [295] crayon_1.3.4 MASS_7.3-54
## [297] pROC_1.16.2 viridis_0.5.1
## [299] dynparam_1.0.0 rpart_4.1-15
## [301] zinbwave_1.8.0 compiler_3.6.3
## [303] ggtext_0.1.0